#找到数据必须是html，不是json
import requests
from lxml import etree
headers = {
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'
}
url = "https://movie.douban.com"
resp = requests.get(url,headers=headers)
# print(resp.status_code)
#//div[@class="subject-card"]//span[@class="subject-card-item-title-text"]
# print(resp.text)
#讲文档转换为节点对象
enode = etree.HTML(resp.text)
names = enode.xpath('//li[@class="ui-slide-item s"]//li[@class="title"]/a/text()')
rates = enode.xpath('//li[@class="ui-slide-item s"]/ul/li[3]/span[string(.) != ""]/text()')
imgs = enode.xpath('//li[@class="ui-slide-item s"]//li[@class="poster"]//img/@src')
for name,rate,img in zip(names,rates,imgs):
    print(f"{name}:{rate}-->{img}")
    img_res = requests.get(img)
    filename = f"img/{name}--{rate}.jpg"
    with open(filename,"wb") as f:
        f.write(img_res.content)